The R markdown code that generated this document is licensed under an MIT software license.
The document itself, and all graphs, tables, figures and text within it, is licensed under a Creative Commons Attribution 4.0 International license (CC-BY).
# set working directory, load libraries, set-up
#wd<-"/Users/marc/work/MLW_LSTM/CRYPTOFAZ"
#setwd(wd)
outDir<-paste(sep="","../RESULTS/",gsub(pattern="-",replacement="",Sys.Date()))
inDir<-"../../DATA/20190820"
inDir2<-"../../DATA/20190815"
inDir3<-"../../DATA/20200401"
inDir4<-"../20201020_PreScreenData"
inDir5<-"../20200804_DummyPID"
if(!dir.exists(outDir)){dir.create(outDir)}
elisaFile1<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA 02_01MAY2019.csv")
elisaFile2<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA_27AUG2018.csv")
elisaFile3<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA3_29APR2019.csv")
pcrFile<-paste(sep="/",inDir2,"pcr_datatransfer.csv")
pcrCtFile<-paste(sep="/",inDir3,"PCRResults-Ct.update.csv")
preScreenFile<-paste(sep="/",inDir4,"RDT-PCR_complete.csv")
dummyPidFile<-paste(sep="/",inDir5,"dummyPID_realPID.csv")
outFileElisa<-paste(sep="",outDir,"/cryptofazElisaNormalised_",Sys.Date(),".csv")
outFileMergedData<-paste(sep="",outDir,"/cryptofazPcrElisaMerged_",Sys.Date(),".csv")
outPrefix<-paste(sep="",outDir,"/cryptofazElisaPcr_",Sys.Date())
#logFile<-paste(sep="",outDir,"/cryptofazLogFile_",gsub(Sys.Date(),pattern="-",replacement=""),".log")
logFile<-""
cat(file=logFile,append=F,paste(sep="","This is cryptofazELISA_normalisation20200318.Rmd.\n\nInput parameters:\n\telisaFile1 = < ",elisaFile1," >,\n\telisaFile2 = < ",elisaFile2," >,\n\telisafile3 = < ",elisaFile3," >,\n\tpcrFile = < ",pcrFile," >,\n\tpcrCtFile = < ",pcrCtFile," >,\n\tpreScreenFile = < ",preScreenFile,">,\n\toutFileElisa = < ",outFileElisa," >,\n\toutFileMergedData = < ",outFileMergedData," >.\n\n"))
## This is cryptofazELISA_normalisation20200318.Rmd.
##
## Input parameters:
## elisaFile1 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA 02_01MAY2019.csv >,
## elisaFile2 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA_27AUG2018.csv >,
## elisafile3 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA3_29APR2019.csv >,
## pcrFile = < ../../DATA/20190815/pcr_datatransfer.csv >,
## pcrCtFile = < ../../DATA/20200401/PCRResults-Ct.update.csv >,
## preScreenFile = < ../20201020_PreScreenData/RDT-PCR_complete.csv>,
## outFileElisa = < ../RESULTS/20201020/cryptofazElisaNormalised_2020-10-20.csv >,
## outFileMergedData = < ../RESULTS/20201020/cryptofazPcrElisaMerged_2020-10-20.csv >.
# read the data
eli1<-read.csv(elisaFile1,stringsAsFactors=F)
eli2<-read.csv(elisaFile2,stringsAsFactors=F)
eli3<-read.csv(elisaFile3,stringsAsFactors=F)
pcr<-read.csv(pcrFile,stringsAsFactors=F)
pcrCt<-read.csv(pcrCtFile,stringsAsFactors=F)
preScreen<-read.csv(preScreenFile,stringsAsFactors=F)
dummyPID<-read.csv(dummyPidFile,stringsAsFactors=F)
cat(file=logFile,append=T,paste(sep="","Reading the data.\n",
"..ELISA file 1: ",nrow(eli1)," rows, ",ncol(eli1)," columns \n",
"..ELISA file 2: ",nrow(eli2)," rows, ",ncol(eli2)," columns \n",
"..ELISA file 3: ",nrow(eli3)," rows, ",ncol(eli3)," columns \n",
"..PCR file: ",nrow(pcr)," rows, ",ncol(pcr)," columns \n",
"..PCR Ct file: ",nrow(pcrCt)," rows, ",ncol(pcrCt)," columns \n",
"..Pre-screening file: ",nrow(preScreen)," rows, ",ncol(preScreen)," columns \n",
"..Dummy PID file: ",nrow(dummyPID)," rows, ",ncol(dummyPID)," columns \n",
"Done.\n\n"))
## Reading the data.
## ..ELISA file 1: 43 rows, 5 columns
## ..ELISA file 2: 33 rows, 4 columns
## ..ELISA file 3: 76 rows, 5 columns
## ..PCR file: 190 rows, 34 columns
## ..PCR Ct file: 428 rows, 7 columns
## ..Pre-screening file: 586 rows, 7 columns
## ..Dummy PID file: 22 rows, 2 columns
## Done.
# fix the Ct values (recode 'Undertermined' values)
pcrCt$Ct[pcrCt$Ct=="Undetermined"]<-"40" # check with James and Darwin whether 'Undertermined' should be NA or a very high Ct value
pcrCt$Ct<-as.numeric(pcrCt$Ct)
pcrCt$Time.point<-gsub(pattern=" $",replacement="",pcrCt$Time.point)
pcrCt$Time.point<-gsub(pattern=" ",replacement="_",pcrCt$Time.point)
pcrCt$Time.point<-tolower(pcrCt$Time.point)
pcrCt<-pcrCt[is.element(el=pcrCt$Time.point,set=c("1st_am","only")),]
pcrCt$Lab.Sample.ID<-pcrCt$Sample.ID
pcrCt$Visit[pcrCt$Lab.Sample.ID=="CDE102F1"]<-6 # mnaually correcting the Visit number for this sample; see email from James Nyirenda 10/07/2020 that the visit was incorrectly recorded for this sample as it was run on a different experiment
# remove a pre-screening row with duplicated PATID and NA qPCR.Ct value
preScreen<-preScreen[!(preScreen$PATID=="CFZ0010019" & is.na(preScreen$qPCR.Ct)),]
preScreen$RDT.Result<-tolower(gsub(pattern=" ",replacement="",preScreen$RDT.Result))
preScreen$qPCR.Result<-tolower(gsub(pattern=" ",replacement="",preScreen$qPCR.Result))
preScreen$RDT.Result[preScreen$RDT.Result==""]<-NA
preScreen$qPCR.Result[preScreen$qPCR.Result==""]<-NA
preScreen$qPCR.Result32<-ifelse(preScreen$qPCR.Ct<32,"positive","negative")
# normalise
eli1$OD.norm<-(eli1$Optical.Density-eli1$Optical.Density[eli1$Sample.Lab.ID=="Negative Control"])/(eli1$Optical.Density[eli1$Sample.Lab.ID=="Positive Control"]-eli1$Optical.Density[eli1$Sample.Lab.ID=="Negative Control"])
eli2$OD.norm<-(eli2$OPTICAL.DENSITY-eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Negative Control"])/(eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Positive Control"]-eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Negative Control"])
eli3$OD.norm<-(eli3$Absorbance-eli3$Absorbance[eli3$Sample.ID=="Negative Control"])/(eli3$Absorbance[eli3$Sample.ID=="Positive Control"]-eli3$Absorbance[eli3$Sample.ID=="Negative Control"])
cat(file=logFile,append=T,"Normalising the ELISA OD data.\nDone.\n\n")
## Normalising the ELISA OD data.
## Done.
# combine the data
elisa<-data.frame(
Participant.ID=c(eli1$Participant.ID,eli2$PARTICIPANT.ID,eli3$Sample.ID.1),
Sample.ID=c(eli1$Sample.Lab.ID,eli2$SAMPLE.ID,eli3$Sample.ID),
Visit=c(eli1$Visit,eli2$VISIT,eli3$Visit),
Time.Point=c(eli1$Time.point,rep(NA,nrow(eli2)),eli3$Time.Point),
Series=c(rep("S1",nrow(eli1)),rep("S2",nrow(eli2)),rep("S3",nrow(eli3))),
OD=c(eli1$Optical.Density,eli2$OPTICAL.DENSITY,eli3$Absorbance),
OD.norm=c(eli1$OD.norm,eli2$OD.norm,eli3$OD.norm)
)
elisa$Participant.ID<-as.character(elisa$Participant.ID)
elisa$Sample.ID<-as.character(elisa$Sample.ID)
elisa[elisa==""]<-NA
elisa[elisa=="N/A"]<-NA
elisa[elisa=="None"]<-NA
elisa$Participant.ID[elisa$Sample.ID=="Positive Control"]<-paste(sep=".","PosCtrl",elisa$Series[elisa$Sample.ID=="Positive Control"])
elisa$Sample.ID[elisa$Sample.ID=="Positive Control"]<-paste(sep=".","PosCtrl",elisa$Series[elisa$Sample.ID=="Positive Control"])
elisa$Participant.ID[elisa$Sample.ID=="Negative Control"]<-paste(sep=".","NegCtrl",elisa$Series[elisa$Sample.ID=="Negative Control"])
elisa$Sample.ID[elisa$Sample.ID=="Negative Control"]<-paste(sep=".","NegCtrl",elisa$Series[elisa$Sample.ID=="Negative Control"])
# Note the positive / negative result is done on the un-normalised ELISA data
# The threshold for dual channel is 0.09 (negative below, positive above), see DATA/20190820/pt5014insert_rev_0806.pdf
elisa$Result<-ifelse(elisa$OD>=0.09,"positive","negative")
elisa$Sample.ID.alt<-paste(sep=".Visit",elisa$Participant.ID,elisa$Visit)
pcr$Sample.ID<-paste(sep=".Visit",pcr$PATID,pcr$Visit)
pcrCt$Sample.ID<-paste(sep=".Visit",pcrCt$Participant.ID,pcrCt$Visit)
allSampIDs<-sort(unique(c(pcr$Sample.ID,pcrCt$Sample.ID,elisa$Sample.ID.Alt)))
dat<-cbind(pcr[match(allSampIDs,pcr$Sample.ID),],pcrCt[match(allSampIDs,pcrCt$Sample.ID),],elisa[match(allSampIDs,elisa$Sample.ID.alt),])
colnames(dat)<-c(paste(sep=".","PCR",colnames(pcr)),paste(sep=".","PCRCT",colnames(pcrCt)),paste(sep=".","ELISA",colnames(elisa)))
dat$PCRCT.Result<-factor(ifelse(dat$PCRCT.Ct<35,"positive","negative"),levels=c("positive","negative"))
dat$PCRCT.Result32<-factor(ifelse(dat$PCRCT.Ct<32,"positive","negative"),levels=c("positive","negative"))
cat(file=logFile,append=T,paste(sep="","Combining the ELISA data from the 3 series.\n",
"..output will have ",nrow(elisa)," rows, ",ncol(elisa)," columns.\n\n",
"Combining the ELISA, PCR and PCR Ct data.\n",
"..the combined data will consist of ",nrow(dat)," rows, ",ncol(dat)," columns.\n\n",
"Done.\n\n"))
## Combining the ELISA data from the 3 series.
## ..output will have 152 rows, 9 columns.
##
## Combining the ELISA, PCR and PCR Ct data.
## ..the combined data will consist of 190 rows, 54 columns.
##
## Done.
# add the ELISA positive / negative results to the pre-screening data
elisaTmp<-elisa[elisa$Visit=="-1",]
preScreen$ELISA.Result<-elisaTmp$Result[match(preScreen$PATID,elisaTmp$Participant.ID)]
preScreen$ELISA.OD<-elisaTmp$OD[match(preScreen$PATID,elisaTmp$Participant.ID)]
preScreen$ELISA.OD.norm<-elisaTmp$OD[match(preScreen$PATID,elisaTmp$Participant.ID)]
rm(elisaTmp)
# adding dummy PIDs
dat$dummyPID<-dummyPID$PCR.PATID2[match(dat$PCR.PATID,dummyPID$PCR.PATID)]
# writing output
write.csv(elisa,row.names=F,file=outFileElisa)
write.csv(dat,row.names=F,file=outFileMergedData)
cat(file=logFile,append=T,"Writing merged data to disk.\nDone.\n\n")
## Writing merged data to disk.
## Done.
sumStat<-function(dat,var,thr=NULL,thrLowHigh="lt"){
N<-sum(!is.na(dat[,var]))
perc<-100*N/nrow(dat)
med<-median(dat[,var],na.rm=T)
rng<-range(dat[,var],na.rm=T)
res<-c(
N,
paste(sep="","(",format(nsmall=1,round(digits=1,perc)),"%)"),
format(nsmall=2,round(digits=2,med)),
paste(sep="","[",paste(collapse=",",format(nsmall=2,round(digits=2,rng))),"]")
)
if(!is.null(thr)){
for(t in thr){
if(thrLowHigh=="lt"){
n<-sum(!is.na(dat[,var]) & dat[,var]<t)
}else if(thrLowHigh=="leq"){
n<-sum(!is.na(dat[,var]) & dat[,var]<=t)
}else if(thrLowHigh=="gt"){
n<-sum(!is.na(dat[,var]) & dat[,var]>t)
}else if(thrLowHigh=="geq"){
n<-sum(!is.na(dat[,var]) & dat[,var]>=t)
}
perc<-paste(sep="","(",format(nsmall=1,round(digits=1,100*n/N)),"%)")
res<-c(res,n,perc)
}
}
return(res)
}
tmpCt<-sumStat(dat=dat,var="PCRCT.Ct",thr=c(32,35),thrLowHigh="lt")
tmpLog2<-sumStat(dat=dat,var="PCR.log2_first")
tmpOD<-sumStat(dat=dat,var="ELISA.OD",thr=0.09,thrLowHigh="geq")
tmpODnorm<-sumStat(dat=dat,var="ELISA.OD.norm")
sumTab<-data.frame(
assay=c(rep("qPCR",8),rep("ELISA",7)),
measurement=c(rep("Ct",5),rep("log2(oocyst / gram)",3),rep("OD",4),rep("normalised OD",3)),
groups=c(rep("",3),"Ct<32","Ct<35",rep("",6),"OD >= 0.09",rep("",3)),
statistic=c("N (%)","Median","Range","n (% out of N)","n (% out of N)","N (%)","Median","Range","N (%)","Median","Range","n (% out of N)","N (%)","Median","Range"),
value1=c(tmpCt[!grepl(tmpCt,pattern="%")],
tmpLog2[!grepl(tmpLog2,pattern="%")],
tmpOD[!grepl(tmpOD,pattern="%")],
tmpODnorm[!grepl(tmpODnorm,pattern="%")]),
value2=c(tmpCt[grepl(tmpCt,pattern="%")][1],"","",tmpCt[grepl(tmpCt,pattern="%")][2:3],tmpLog2[grepl(tmpLog2,pattern="%")],"","",tmpOD[grepl(tmpOD,pattern="%")][1],"","",tmpOD[grepl(tmpOD,pattern="%")][2],tmpODnorm[grepl(tmpODnorm,pattern="%")],"","")
)
sumTab<-sumTab[,-1] # first column wasn't needed when using kable_styling; quicker to just get rid of it like this rather than edit the above code
kable(sumTab,caption="Summaries of qPCR and ELISA measurements.",col.names=NULL) %>%
kable_styling(full_width=F) %>%
pack_rows("qPCR", 1, 8) %>%
pack_rows("ELISA", 9, 15) %>%
column_spec(1, bold = T) %>%
collapse_rows(columns = 1, valign = "top")
| qPCR | ||||
| Ct | N (%) | 180 | (94.7%) | |
| Median | 29.10 | |||
| Range | [21.24,40.00] | |||
| Ct<32 | n (% out of N) | 147 | (81.7%) | |
| Ct<35 | n (% out of N) | 168 | (93.3%) | |
| log2(oocyst / gram) | N (%) | 180 | (94.7%) | |
| Median | 14.03 | |||
| Range | [ 3.82,22.34] | |||
| ELISA | ||||
| OD | N (%) | 141 | (74.2%) | |
| Median | 0.00 | |||
| Range | [0.00,3.07] | |||
| OD >= 0.09 | n (% out of N) | 43 | (30.5%) | |
| normalised OD | N (%) | 141 | (74.2%) | |
| Median | 0.00 | |||
| Range | [0.00,2.24] | |||
Note that qPCR Ct values are right-censored with an upper limit of detection of \(Ct\geq40\) and ELISA OD values are left-censored with a lower limit of detection (\(OD<0.09\)). If means and variances are estimated directly from the data, then bias will be introduced. For this reason we use methods from survival analysis to take the censoring into account.
The most common method for this is estimation of the restricted mean and the associated standard error can be converted to an estimate of the standard deviation or variance. However this requires providing both the detection limit and the lowest (for ELISA OD) or highest (for qPCR Ct) possible value. For OD, this is simple: 0. For Ct, it is a little trickier, as it could be an infinite value.
Further, we would need to use unormalised ELISA OD and raw qPCR Ct values as the normalised ELISA OD and the log2 oocyst count would have different detection limits for different samples.
2 main choices here:
Use raw qPCR 1/Ct and unormalised ELISA OD, both with minimum value 0 and detection limits 1/40 and 0.09 respectively.
Use the processed log2 oocyst count and normalised ELISA OD values, but estimate variability only for values above the detection limit.
Note in both cases, for ELISA only positive samples would be used, while for qPCR both positive and negative (\(Ct\geq35\) but \(<40\)) would be used. Using raw / unnormalised data would have extra variation due to technical effects from the different experiments but would allow to account for the detection limit in a principled fashion. Using the log2 oocyst and normalised OD values would be rid of some of the technical variation but since different samples will have different detection limits, would make it difficult / impossible to account for the detection limits…
pcr<-dat$PCR.log2_first[!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct<40]
elisa<-dat$ELISA.OD.norm[!is.na(dat$ELISA.OD) & dat$ELISA.OD>=0.09]
qPCR log2 oocyst count per gram: overall mean = 14.17, overall sd = 3.47, overall CV = 0.24.
ELISA normalised OD: overall mean = 0.68, overall sd = 0.53, overall CV = 0.79.
ttPCR<-dat %>%
group_by(PCR.PATID) %>%
summarise(mean=mean(PCR.log2_first[!is.na(PCRCT.Ct) & PCRCT.Ct<40],na.rm=T),sd=sd(PCR.log2_first[!is.na(PCRCT.Ct) & PCRCT.Ct<40],na.rm=T)) %>%
mutate(CV=sd/mean)
#kable(ttPCR,caption="Mean, SD and CV of qPCR log2 oocyst count per gram values for each participant.")
The intra-individual CV of qPCR log2 oocyst count per gram is 0.17.
ttELISA<-dat %>%
group_by(PCR.PATID) %>%
summarise(mean=mean(ELISA.OD.norm[!is.na(ELISA.OD) & ELISA.OD>=0.09],na.rm=T),sd=sd(ELISA.OD.norm[!is.na(ELISA.OD) & ELISA.OD>=0.09],na.rm=T)) %>%
mutate(CV=sd/abs(mean))
#kable(ttELISA,caption="Mean, SD and CV of ELISA normalised OD values for each participant.")
The intra-individual CV of ELISA normalised OD is 0.54.
The mean of the participant qPCR log2 oocyst count means is 14.24, the SD of the participant qPCR log2 oocyst count means is 2.52 and the inter-individual CV of qPCR log2 oocyst count per gram is 0.18.
The mean of the participant ELISA OD means is 0.61, the SD of the participant ELISA OD means is 0.50 and the inter-individual CV of ELISA OD is 0.83.
ssTotPCR<-var(dat$PCR.log2_first[!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct<40],na.rm=T)*(sum(!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct<40)-1)
tmpDat<-dat %>%
filter(!is.na(PCRCT.Ct) & PCRCT.Ct<40) %>%
group_by(PCR.PATID) %>%
summarise(mean=mean(PCR.log2_first,na.rm=T))
tmpDat<-data.frame(PATID=dat$PCR.PATID,PCRCT.Ct=dat$PCRCT.Ct,PCR.log2_first=dat$PCR.log2_first,meanOocyst=tmpDat$mean[match(dat$PCR.PATID,tmpDat$PCR.PATID)]) %>% filter(!is.na(PCRCT.Ct) & PCRCT.Ct<40)
tmpDat$diffSq=(tmpDat$PCR.log2_first-tmpDat$meanOocyst)^2
ssIntraPCR<-sum(tmpDat$diffSq,na.rm=T)
ssInterPCR<-sum((tmpDat$meanOocyst-mean(tmpDat$PCR.log2_first,na.rm=T))^2)
ssTotELISA<-var(dat$ELISA.OD.norm[!is.na(dat$ELISA.OD) & dat$ELISA.OD>=0.09],na.rm=T)*(sum(!is.na(dat$ELISA.OD) & dat$ELISA.OD>=0.09)-1)
tmpDat<-dat %>%
filter(!is.na(dat$ELISA.OD) & ELISA.OD>=0.09) %>%
group_by(PCR.PATID) %>%
summarise(mean=mean(ELISA.OD.norm,na.rm=T))
tmpDat<-data.frame(PATID=dat$ELISA.OD.norm,ELISA.OD=dat$ELISA.OD,ELISA.OD.norm=dat$ELISA.OD.norm,meanOD=tmpDat$mean[match(dat$PCR.PATID,tmpDat$PCR.PATID)]) %>% filter(!is.na(dat$ELISA.OD) & ELISA.OD>=0.09)
tmpDat$diffSq=(tmpDat$ELISA.OD.norm-tmpDat$meanOD)^2
ssIntraELISA<-sum(tmpDat$diffSq,na.rm=T)
ssInterELISA<-sum((tmpDat$meanOD-mean(tmpDat$ELISA.OD.norm,na.rm=T))^2)
For qPCR Ct, 50.77% of the total variation is due to within participant variation and 49.23% is due to between participant variation.
For ELISA normalised OD 36.52% of the total variation is due to within participant variation and 63.48% is due to between participant variation.
Generally, there is more variability in ELISA normalised OD measurements than in log2 oocyst count per gram. For log2 oocyst count, inter and intra CVs are virtually the same whereas for ELISA normalised OD, the inter individual CV is larger than the intra individual CV. This last conclusion is also confirmed by looking at the sum of squares (SS) withing (intra) and between (inter) individuals: both are similar for qPCR, but the between SS is larger than the within individual SS.
# PCR log2(oocyst per gram in first stool) vs. ELISA normalised OD
g<-ggplot(data=dat,mapping=aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID)) +
geom_point(cex=3) +
geom_line(lwd=0.2,lty=2,data=dat[!is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm),]) +
scale_color_manual(values=c("steelblue","orange"))
print(g)
# profiles over time for log2(oocyst per gram in first stool) and normalised OD
g1<-ggplot(data=dat,mapping=aes(x=PCR.Visit,y=PCR.log2_first,col=PCR.treatment,pch=PCR.PATID,group=PCR.PATID)) +
geom_point() +
geom_line(data=dat[!is.na(dat$PCR.log2_first),]) +
scale_shape_manual(values=seq(0,22)) +
scale_color_manual(values=c("steelblue","orange")) +
theme(legend.position="top")
datTmp<-dat[!is.na(dat$ELISA.OD.norm),]
g2<-ggplot(data=datTmp,mapping=aes(x=ELISA.Visit,y=ELISA.OD.norm,col=PCR.treatment,pch=ELISA.Participant.ID,group=ELISA.Participant.ID)) +
geom_point() +
geom_line(na.rm=T) +
scale_shape_manual(values=seq(0,22))+
scale_color_manual(values=c("steelblue","orange")) +
theme(legend.position="top")
grid.arrange(g1,g2,nrow=2)
plotMeanCIs<-function(datCFZ,datPLCB,varName,mTitle,logScale=F,xLabel="Study Day",yLabel,yLimits,days=c(-1,1:6,"19-24","41-55"),daysAlt=c(-1,1:8)){
tmpMat<-matrix(nrow=4,ncol=length(days))
if(!logScale){
rownames(tmpMat)<-c("CFZ_mean","Placebo_mean","CFZ_SE","Placebo_SE")
}else{
rownames(tmpMat)<-c("CFZ_mean","Placebo_mean","CFZ_SElog","Placebo_SElog")
}
colnames(tmpMat)<-c(paste(sep="","day",days))
for(j in 1:length(days)){
if(!logScale){
tmpMat["CFZ_mean",j]<-mean(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName],na.rm=T)
tmpMat["CFZ_SE",j]<-sd(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName],na.rm=T)/sqrt(sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName])))
tmpMat["Placebo_mean",j]<-mean(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName],na.rm=T)
tmpMat["Placebo_SE",j]<-sd(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName],na.rm=T)/sqrt(sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName])))
}else{
tmpMat["CFZ_mean",j]<-exp(mean(log(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]),na.rm=T))
tmpMat["CFZ_SElog",j]<-sd(log(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]),na.rm=T)/sqrt(sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName])))
tmpMat["Placebo_mean",j]<-exp(mean(log(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]),na.rm=T))
tmpMat["Placebo_SElog",j]<-sd(log(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]),na.rm=T)/sqrt(sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName])))
}
}
plot(type="n",c(1,length(days)),yLimits,xlab=xLabel,ylab=yLabel,main=mTitle,axes=F)
points(lty=2,col="steelblue",1:length(days),tmpMat["CFZ_mean",],cex=1.5)
idxNoNA<-sort(which(!is.na(tmpMat["CFZ_mean",])))
lines(lty=2,col="steelblue",(1:length(days))[idxNoNA],tmpMat["CFZ_mean",idxNoNA])
points(lty=2,col="orange",1:length(days)+0.1,tmpMat["Placebo_mean",],pch=22,cex=1.5)
idxNoNA<-sort(which(!is.na(tmpMat["Placebo_mean",])))
lines(lty=2,col="orange",(1:length(days))[idxNoNA]+0.1,tmpMat["Placebo_mean",idxNoNA])
for(j in 1:length(days)){
qCfz<-qt(0.975,df=sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]))-1)
qPlcb<-qt(0.975,df=sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]))-1)
if(!logScale){
segments(j,tmpMat["CFZ_mean",j],j,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],col="steelblue")
segments(j,tmpMat["CFZ_mean",j],j,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],col="steelblue")
segments(j-0.08,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],j+0.08,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],col="steelblue")
segments(j-0.08,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],j+0.08,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],col="steelblue")
segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],col="orange")
segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],col="orange")
segments(j+0.1-0.08,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],j+0.1+0.08,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],col="orange")
segments(j+0.1-0.08,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],j+0.1+0.08,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],col="orange")
}else{
segments(j,tmpMat["CFZ_mean",j],j,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
segments(j,tmpMat["CFZ_mean",j],j,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
segments(j-0.08,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),j+0.08,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
segments(j-0.08,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),j+0.08,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
segments(j+0.1-0.08,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),j+0.1+0.08,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
segments(j+0.1-0.08,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),j+0.1+0.08,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
}
}
axis(side=1,at=1:length(days),labels=as.character(days))
axis(side=2)
box()
legend(x="topright",col=c("steelblue","orange"),legend=c("CFZ","Placebo"),lty=1,pch=c(1,22),horiz=T,pt.cex=1.5)
}
plotMeanCIs(datCFZ=dat[dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.treatment=="Placebo",],varName="PCR.log2_first",mTitle="Mean log2 oocyst per gram over time.",yLabel="log2(oocyst per gram)",yLimits=range(dat$PCR.log2_first,na.rm=T))
plotMeanCIs(datCFZ=dat[dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.treatment=="Placebo",],varName="ELISA.OD.norm",mTitle="Mean normalised ELISA optical density over time.",yLabel="Normalised OD",yLimits=c(-0.25,1)) # can't work on the log scale as ELISA data has actual zeroes in it
dat$PCR.log2_first.cfb<-dat$PCR.log2_first
dat$ELISA.OD.norm.cfb<-dat$ELISA.OD.norm
for(pid in unique(dat$PCR.PATID)){
baseLog2<-ifelse(!is.na(dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==1]),(dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==-1]+dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==1])/2,dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==-1])
baseOD<-ifelse(!is.na(dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==1]),(dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==-1]+dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==1])/2,dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==-1])
dat$PCR.log2_first.cfb[dat$PCR.PATID==pid]<-dat$PCR.log2_first[dat$PCR.PATID==pid]-baseLog2
dat$ELISA.OD.norm.cfb[dat$PCR.PATID==pid]<-dat$ELISA.OD.norm[dat$PCR.PATID==pid]-baseOD
}
plotMeanCIs(datCFZ=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="Placebo",],varName="PCR.log2_first.cfb",mTitle="Mean log2 oocyst per gram over time (change from baseline).",yLabel="CFB log2(oocyst per gram)",yLimits=c(-7,5),days=c(2:6,"19-24","41-55"),daysAlt=2:8)
plotMeanCIs(datCFZ=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="Placebo",],varName="ELISA.OD.norm.cfb",mTitle="Mean CFB normalised ELISA optical density over time (change from baseline).",yLabel=" CFB Normalised OD",yLimits=c(-0.5,0.9),days=c(2:6,"19-24","41-55"),daysAlt=2:8)
# clustered rank-sum test
datTmp<-dat %>% filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<40) %>% mutate(group=ifelse(ELISA.Result=="positive",1,0))
clusranksum<-clusWilcox.test(PCRCT.Ct~group+cluster(as.integer(factor(PCR.PATID))),data=datTmp,method="ds")
pvalClusRankSum<-clusranksum$p.value
# mixed model
lmmRes<-lme(PCRCT.Ct~factor(ELISA.Result),random=~1|PCR.PATID,correlation=corCompSymm(form=~1|PCR.PATID),data=datTmp)
pvalMixed<-summary(lmmRes)$tTable[2,"p-value"]
# mixed censored regression
pDat<-dat %>%
filter(!is.na(ELISA.Result)) %>%
mutate(group=ifelse(ELISA.Result=="positive",1,0)) %>%
pdata.frame(index=c("PCR.PATID","PCR.Visit"))
censRegMod<-censReg(PCRCT.Ct~group,data=pDat,right=40,method="BHHH",nGHQ=128) # https://cran.r-project.org/web/packages/censReg/vignettes/censReg.pdf
pvalMixedCensReg<-summary(censRegMod)$estimate["group","Pr(> t)"]
g<-dat %>%
filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
mutate(pcrElisa=factor(case_when(
ELISA.Result=="positive" & PCRCT.Result=="positive"~"PcrPosElisaPos",
ELISA.Result=="positive" & PCRCT.Result=="negative"~"PcrNegElisaPos",
ELISA.Result=="negative" & PCRCT.Result=="positive"~"PcrPosElisaNeg",
ELISA.Result=="negative" & PCRCT.Result=="negative"~"PcrNegElisaNeg"
),levels=c("PcrNegElisaNeg","PcrPosElisaNeg","PcrPosElisaPos"))) %>%
ggplot(aes(x=ELISA.Result, y=PCRCT.Ct, color=pcrElisa)) +
geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA, fill="grey95") +
geom_jitter(position=position_jitter(height=0,width=0.2),size=3) +
scale_color_manual(values=c("steelblue4","steelblue1","orange1"),labels=c("PCR(-) ELISA(-)","PCR(+) ELISA(-)","PCR(+) ELISA(+)"),name="") +
xlab("ELISA") +
ylab("qPCR Ct") +
ylim(c(20.5,40.75)) +
theme_light() +
theme(text=element_text(size=20),legend.position="bottom") +
geom_abline(slope=0,intercept=35,lty=2,col="grey35",lwd=1.25) +
ggtitle("qPCR Ct values according to ELISA positivity.") +
geom_text(mapping=aes(x=1.5,y=40.75,label=paste(sep="","p = ",format(nsmall=4,round(digits=4,pvalMixedCensReg)))),size=8,col="grey35") +
geom_text(mapping=aes(x=0.55,y=35.5,label="Ct=35"),size=6,col="grey35")
print(g)
png(file=paste(sep="",outPrefix,"_Fig1_BoxJitter_Ct35.png"),height=12,width=10,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen
## 2
sumStat<-dat %>%
filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
group_by(ELISA.Result) %>%
summarise(median=format(nsmall=2,round(digits=2,median(PCRCT.Ct))),IQR=paste(sep="","(",paste(collapse=",",format(nsmall=2,round(digits=2,quantile(PCRCT.Ct,probs=c(0.25,0.75))))),")"))
print(sumStat)
## # A tibble: 2 x 3
## ELISA.Result median IQR
## <chr> <chr> <chr>
## 1 negative 29.75 (27.61,31.79)
## 2 positive 26.26 (23.91,28.51)
# clustered rank-sum test
datTmp<-dat %>% filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<40) %>% mutate(group=ifelse(ELISA.Result=="positive",1,0))
clusranksum<-clusWilcox.test(PCRCT.Ct~group+cluster(as.integer(factor(PCR.PATID))),data=datTmp,method="ds")
pvalClusRankSum<-clusranksum$p.value
# mixed model
lmmRes<-lme(PCRCT.Ct~factor(ELISA.Result),random=~1|PCR.PATID,correlation=corCompSymm(form=~1|PCR.PATID),data=datTmp)
pvalMixed<-summary(lmmRes)$tTable[2,"p-value"]
# mixed censored regression
pDat<-dat %>%
filter(!is.na(ELISA.Result)) %>%
mutate(group=ifelse(ELISA.Result=="positive",1,0)) %>%
pdata.frame(index=c("PCR.PATID","PCR.Visit"))
censRegMod<-censReg(PCRCT.Ct~group,data=pDat,right=40,method="BHHH",nGHQ=128) # https://cran.r-project.org/web/packages/censReg/vignettes/censReg.pdf
pvalMixedCensReg<-summary(censRegMod)$estimate["group","Pr(> t)"]
dat %>%
filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
mutate(pcrElisa=factor(case_when(
ELISA.Result=="positive" & PCRCT.Result32=="positive"~"PcrPosElisaPos",
ELISA.Result=="positive" & PCRCT.Result32=="negative"~"PcrNegElisaPos",
ELISA.Result=="negative" & PCRCT.Result32=="positive"~"PcrPosElisaNeg",
ELISA.Result=="negative" & PCRCT.Result32=="negative"~"PcrNegElisaNeg"
),levels=c("PcrNegElisaNeg","PcrPosElisaNeg","PcrNegElisaPos","PcrPosElisaPos"))) %>%
ggplot(aes(x=ELISA.Result, y=PCRCT.Ct, color=pcrElisa)) +
geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA, fill="grey95") +
geom_jitter(position=position_jitter(height=0,width=0.2),size=3) +
scale_color_manual(values=c("steelblue4","steelblue1","orange4","orange1"),labels=c("PCR(-) ELISA(-)","PCR(+) ELISA(-)","PCR(-) ELISA(+)","PCR(+) ELISA(+)"),name="") +
xlab("ELISA") +
ylab("qPCR Ct") +
ylim(c(20.5,40.75)) +
theme_light() +
theme(text=element_text(size=20),legend.position="bottom") +
geom_abline(slope=0,intercept=32,lty=2,col="grey35",lwd=1.25) +
ggtitle("qPCR Ct values according to ELISA positivity.") +
geom_text(mapping=aes(x=1.5,y=40.75,label=paste(sep="","p = ",format(nsmall=4,round(digits=4,pvalMixedCensReg)))),size=8,col="grey35") +
geom_text(mapping=aes(x=0.55,y=32.5,label="Ct=32"),size=6,col="grey35")
print(g)
png(file=paste(sep="",outPrefix,"_Fig1_BoxJitter_Ct32.png"),height=12,width=10,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen
## 2
The p-value reported on the figures above is from a mixed censored regression model and is the p-value testing the null hypothesis that the fixed effect coefficient for the grouping variable (ELISA positivity) is zero. This was implemented using the function censReg() from the censReg package in R (Henningsen, A., censReg: Censored Regression (Tobit) Models, v0.5-30, https://CRAN.R-project.org/package=censReg, 2019).
Summarising the data in a 2x2 table (note this is only for summary; any usual contingency test based on this table will not be valid due to multiple observations per patient):
tt<-table(dat$PCRCT.Result,dat$ELISA.Result)
tt<-data.frame(var1="qPCR",qPCR=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"])
tt<-tt[2:1,]
kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
kable_styling(full_width = F) %>%
add_header_above(c(" " = 2, "ELISA" = 2)) %>%
column_spec(2, bold = T) %>%
collapse_rows(columns = 1, valign = "middle")
| negative | positive | ||
|---|---|---|---|
| qPCR | negative | 9 | 0 |
| positive | 89 | 43 |
tt<-table(dat$PCRCT.Result32,dat$ELISA.Result)
tt<-data.frame(var1="qPCR",qPCR=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"])
tt<-tt[2:1,]
kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
kable_styling(full_width = F) %>%
add_header_above(c(" " = 2, "ELISA" = 2)) %>%
column_spec(2, bold = T) %>%
collapse_rows(columns = 1, valign = "middle")
| negative | positive | ||
|---|---|---|---|
| qPCR | negative | 23 | 2 |
| positive | 75 | 41 |
#The same figure as above, but now with 3 columns, also separating according to qPCR positivity (note: no sample that are ELISA positive but qPCR negative):
#
# dat<-dat %>%
# mutate(groupPcrElisa=case_when(
# dat$ELISA.Result=="positive" & dat$PCRCT.Result=="positive"~"ELISA positive\nqPCR positive",
# dat$ELISA.Result=="positive" & dat$PCRCT.Result=="negative"~"ELISA positive\nqPCR negative",
# dat$ELISA.Result=="negative" & dat$PCRCT.Result=="positive"~"ELISA negative\nqPCR positive",
# dat$ELISA.Result=="negative" & dat$PCRCT.Result=="negative"~"ELISA negative\nqPCR negative",
# ))
#
# dat %>%
# filter(!is.na(groupPcrElisa) & dat$PCRCT.Ct<40) %>%
# ggplot(aes(x=groupPcrElisa, y=PCRCT.Ct, col=groupPcrElisa)) +
# geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA) +
# geom_jitter(position=position_jitter(0.2),size=3) +
# scale_color_manual(values=c("steelblue4","steelblue","orange","yellow"),labels=NULL,breaks=NULL) +
# xlab("") +
# ylab("qPCR Ct") +
# ylim(c(20.5,40.25)) +
# theme(text=element_text(size=20)) +
# ggtitle("qPCR Ct values according to qPCR and ELISA positivity.")
Below are similar figures, but splitting on treatment group.
g1<-dat %>%
filter(!is.na(ELISA.Result) & ELISA.Result=="negative") %>%
ggplot(aes(x=PCR.treatment, y=PCRCT.Ct, col=PCR.treatment)) +
geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA) +
geom_jitter(position=position_jitter(0.2),size=3) +
scale_color_manual(values=c("steelblue","steelblue2"),labels=NULL,breaks=NULL) +
xlab("Treatment") +
ylab("qPCR Ct") +
ylim(c(20.5,40.25)) +
theme(text=element_text(size=20)) +
ggtitle("qPCR Ct values according to treatment\n(ELISA negatives).")
g2<-dat %>%
filter(!is.na(ELISA.Result) & ELISA.Result=="positive") %>%
ggplot(aes(x=PCR.treatment, y=PCRCT.Ct, col=PCR.treatment)) +
geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA) +
geom_jitter(position=position_jitter(0.2),size=3) +
scale_color_manual(values=c("orange3","orange"),labels=NULL,breaks=NULL) +
xlab("Treatment") +
ylab("qPCR Ct") +
ylim(c(20.5,40.25)) +
theme(text=element_text(size=20)) +
ggtitle("qPCR Ct values according to treatment\n(ELISA positives).")
grid.arrange(g1,g2,nrow=1)
plotqPCRELISATimeHeatmap<-function(plotDat,pid,xlab="",ylab="",ctThr=35){
plotDat<-plotDat %>%
mutate(PCR.log2_first.rs = scales::rescale(PCR.log2_first,to=c(0.2,1)),
ELISA.OD.norm.rs = scales::rescale(ELISA.OD.norm,to=c(0.2,1))) %>%
filter(PCR.PATID==pid) %>%
dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCRCT.Ct,PCRCT.Result,PCR.log2_first.rs,ELISA.OD.norm.rs,ELISA.Result)
plotDat$PCRCT.Result<-factor(ifelse(plotDat$PCRCT.Ct<ctThr,"P","N"),levels=c("P","N"))
plotDat$ELISA.Result<-factor(ifelse(plotDat$ELISA.Result=="positive","P","N"),levels=c("P","N"))
plotDat$PCR.Visit<-factor(plotDat$PCR.Visit,levels=c("-1","1","2","3","4","5","6","7","8"))
levels(plotDat$PCR.Visit)[levels(plotDat$PCR.Visit)=="7"]<-"19-24"
levels(plotDat$PCR.Visit)[levels(plotDat$PCR.Visit)=="8"]<-"41-55"
plotDatWide1<-plotDat %>%
dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCR.log2_first.rs,ELISA.OD.norm.rs) %>%
pivot_longer(cols=c("PCR.log2_first.rs","ELISA.OD.norm.rs"),names_to="assay")
plotDatWide1$assay<-factor(ifelse(plotDatWide1$assay=="PCR.log2_first.rs","qPCR","ELISA"))
plotDatWide2<-plotDat %>%
dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCRCT.Result,ELISA.Result) %>%
pivot_longer(cols=c("PCRCT.Result","ELISA.Result"),names_to="assay")
plotDatWide2$assay<-factor(ifelse(plotDatWide2$assay=="PCRCT.Result","qPCR","ELISA"))
plotDatWide<-plotDatWide1
plotDatWide$positivity<-plotDatWide2$value
g1<-plotDatWide %>%
group_by(assay) %>%
complete(PCR.Visit) %>%
ggplot(mapping=aes(y=assay,x=PCR.Visit,alpha=value,fill=assay)) +
geom_tile(aes(alpha = value), color = "white") +
scale_fill_manual(values=c("steelblue","orange"),drop=FALSE,na.value="gray") +
coord_equal() +
scale_alpha(na.value=0) +
geom_text(mapping=aes(x=PCR.Visit,y=assay,label=positivity,alpha=1)) +
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = unit(c(0.1,0.1,0.1,0), "pt"),
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
xlab(xlab) +
ylab(ylab)
datAnnot<-data.frame(
x=rep(c(3,7,10),2),
y=c(rep(2,3),rep(1,3)),
label=c(pid,unique(plotDat$PCR.treatment[plotDat$PCR.PATID==pid & !is.na(plotDat$PCR.treatment)]),"qPCR","","","ELISA")
)
g2<-datAnnot %>%
ggplot(mapping=aes(y=y,x=x)) +
geom_tile(color = "white",alpha=0) +
geom_text(mapping=aes(label=label)) +
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = unit(c(0.1,0,0.1,0.1), "pt"),
panel.background = element_blank(),
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
coord_equal()
g<-plot_grid(g2,g1, align = "h", nrow = 1, rel_heights = rep(1,2))
return(g)
}
g<-list()
datTmp <- dat %>%
filter(dat$PCR.treatment=="CFZ")
datTmp$PCR.PATID<-factor(paste(sep="","CFZ",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
for(i in 1:length(levels(datTmp$PCR.PATID))){
pid<-levels(datTmp$PCR.PATID)[i]
g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=35)
}
datTmp <- dat %>%
filter(dat$PCR.treatment=="Placebo")
#datTmp$PCR.PATID<-factor(paste(sep="","PCB",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
datTmp$PCR.PATID<-factor(datTmp$dummyPID)
for(i in 1:length(levels(datTmp$PCR.PATID))){
pid<-levels(datTmp$PCR.PATID)[i]
g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=35)
}
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 35", gp=gpar(fontsize=18)))
png(file=paste(sep="",outPrefix,"_SupFig1_HeatMap_Ct35.png"),height=12,width=12,units="in",res=450)
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 35", gp=gpar(fontsize=18)))
dev.off()
## quartz_off_screen
## 2
g<-list()
datTmp <- dat %>%
filter(dat$PCR.treatment=="CFZ")
datTmp$PCR.PATID<-factor(paste(sep="","CFZ",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
for(i in 1:length(levels(datTmp$PCR.PATID))){
pid<-levels(datTmp$PCR.PATID)[i]
g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=32)
}
datTmp <- dat %>%
filter(dat$PCR.treatment=="Placebo")
#datTmp$PCR.PATID<-factor(paste(sep="","PCB",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
datTmp$PCR.PATID<-factor(datTmp$dummyPID)
for(i in 1:length(levels(datTmp$PCR.PATID))){
pid<-levels(datTmp$PCR.PATID)[i]
g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=32)
}
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 32", gp=gpar(fontsize=18)))
png(file=paste(sep="",outPrefix,"_SupFig1_HeatMap_Ct32.png"),height=12,width=12,units="in",res=450)
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 32", gp=gpar(fontsize=18)))
dev.off()
## quartz_off_screen
## 2
substantialDelta<-function(m1,m2,type="sd",minChange=0.1,sdVal=1){
# m1, m2 = measurements at day i and i+1 that are compared
# type = one of 'sd' (need a change of at least minChnage*sd) or 'rel' (need a relative change of minChange from m1 to m2)
# minChange = the minimum (positive or negative) difference needed to consider that m2 is meaningfully different from m1; if type=="sd", m2-m1 expressed as a multiple of the standard deviation; if type=="rel" the percentage change of m2 compared to m1
# sd = standard deviation value to be used when type=="sd"
deltaAbs<-m2-m1
if(m1!=0){
deltaRel<-(m2-m1)/m1
}else{
if(m2==0){deltaRel<-0}else{deltaRel<-Inf*sign(deltaAbs)}
}
res<-0
if((type=="sd" & abs(deltaAbs)>=minChange*sdVal) | (type=="rel" & abs(deltaRel)>=minChange)){
res<-1*sign(deltaAbs)
}
return(res)
}
concordFun<-function(dat){
pids<-unique(dat$dummyPID)
visits<-c(-1,1:8)
sdPCR<-sd(dat$PCR.log2_first,na.rm=T)
sdELISA<-sd(dat$ELISA.OD.norm,na.rm=T)
resSd<-numeric(0)
resRel<-numeric(0)
# +2 == change in both, same direction
# +1 == change PCR, no change ELISA
# 0 == no change in either
# -1 == no change PCR, change ELISA
# -2 == both change, opposite direction
for(p in pids){
datTmp<-dat %>% filter(dummyPID==p & !is.na(PCR.Visit) & !is.na(ELISA.Visit))
cur<-visits[1]
for(i in 2:length(visits)){
v<-visits[i]
if(sum(datTmp$PCR.Visit==cur)==1 & sum(datTmp$ELISA.Visit==cur)==1 & sum(datTmp$PCR.Visit==v)==1 & sum(datTmp$ELISA.Visit==v)==1){
if(!is.na(datTmp$PCR.log2_first[datTmp$PCR.Visit==cur]) & !is.na(datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur]) & !is.na(datTmp$PCR.log2_first[datTmp$PCR.Visit==v]) & !is.na(datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v])){
# deltaPCR<-datTmp$PCR.log2_first[datTmp$PCR.Visit==v]-datTmp$PCR.log2_first[datTmp$PCR.Visit==cur]
# deltaELISA<-datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v]-datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur]
#
# relDeltaPCR<-deltaPCR/datTmp$PCR.log2_first[datTmp$PCR.Visit==cur]-1
# relDeltaELISA<-deltaELISA/datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur]-1
#
# deltaPCR<-deltaPCR/sdPCR
# deltaELISA<-deltaELISA/sdELISA
subChangePCRSd<-substantialDelta(m1=datTmp$PCR.log2_first[datTmp$PCR.Visit==cur],m2=datTmp$PCR.log2_first[datTmp$PCR.Visit==v],type="sd",sdVal=sdPCR,minChange=0.1)
subChangePCRRel<-substantialDelta(m1=datTmp$PCR.log2_first[datTmp$PCR.Visit==cur],m2=datTmp$PCR.log2_first[datTmp$PCR.Visit==v],type="rel",minChange=0.1)
subChangeELISASd<-substantialDelta(m1=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur],m2=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v],type="sd",sdVal=sdELISA,minChange=0.1)
subChangeELISARel<-substantialDelta(m1=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur],m2=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v],type="rel",minChange=0.1)
if((subChangePCRSd==1 & subChangeELISASd==1) | (subChangePCRSd==-1 & subChangeELISASd==-1)){
resSdTmp<-2
}else if((subChangePCRSd==1 & subChangeELISASd==-1) | (subChangePCRSd==-1 & subChangeELISASd==1)){
resSdTmp<-(-2)
}else if(subChangePCRSd==0 & subChangeELISASd==0){
resSdTmp<-0
}else if(abs(subChangePCRSd)==1 & subChangeELISASd==0){
resSdTmp<-1
}else if(subChangePCRSd==0 & abs(subChangeELISASd)==1){
resSdTmp<-(-1)
}
if((subChangePCRRel==1 & subChangeELISARel==1) | (subChangePCRRel==-1 & subChangeELISARel==-1)){
resRelTmp<-2
}else if((subChangePCRRel==1 & subChangeELISARel==-1) | (subChangePCRRel==-1 & subChangeELISARel==1)){
resRelTmp<-(-2)
}else if(subChangePCRRel==0 & subChangeELISARel==0){
resRelTmp<-0
}else if(abs(subChangePCRRel)==1 & subChangeELISARel==0){
resRelTmp<-1
}else if(subChangePCRRel==0 & abs(subChangeELISARel)==1){
resRelTmp<-(-1)
}
resSd<-c(resSd,resSdTmp)
resRel<-c(resRel,resRelTmp)
cur<-v
}
}
}
}
return(list(sd=resSd,rel=resRel))
}
res<-concordFun(dat)
resSd<-res$sd
resRel<-res$rel
print(table(resSd))
## resSd
## -2 -1 0 1 2
## 13 3 10 63 30
print(table(resRel))
## resRel
## -2 -1 0 1 2
## 26 33 11 10 39
N<-1e3
pRanSd<-rep(NA,N)
pRanRel<-rep(NA,N)
for(j in 1:N){
# randomise
idxAllNA<-which( is.na(dat$PCR.log2_first) & is.na(dat$ELISA.OD.norm) )
idxComplete<-which( !is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm) )
idxPcrNA<-which( is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm) )
idxElisaNA<-which( !is.na(dat$PCR.log2_first) & is.na(dat$ELISA.OD.norm) )
idxCompleteRan1<-sample(x=idxComplete,size=length(idxComplete),replace=F)
idxPcrNARan1<-sample(x=idxPcrNA,size=length(idxPcrNA),replace=F)
idxElisaNARan1<-sample(x=idxElisaNA,size=length(idxElisaNA),replace=F)
idxCompleteRan2<-sample(x=idxComplete,size=length(idxComplete),replace=F)
idxPcrNARan2<-sample(x=idxPcrNA,size=length(idxPcrNA),replace=F)
idxElisaNARan2<-sample(x=idxElisaNA,size=length(idxElisaNA),replace=F)
idx<-c(idxComplete,idxPcrNA,idxElisaNA,idxAllNA)
idxRan1<-c(idxCompleteRan1,idxPcrNARan1,idxElisaNARan1,idxAllNA)
idxRan2<-c(idxCompleteRan2,idxPcrNARan2,idxElisaNARan2,idxAllNA)
datTmp<-dat
datTmp$PCR.log2_first[idx]<-dat$PCR.log2_first[idxRan1]
datTmp$ELISA.OD.norm[idx]<-dat$ELISA.OD.norm[idxRan2]
# compute
res<-concordFun(dat=datTmp)
resSdTmp<-sum(res$sd==0 | res$sd==2)
resRelTmp<-sum(res$rel==0 | res$rel==2)
#check
pRanSd[j]<-ifelse(resSdTmp>=sum(resSd==0 | resSd==2),1,0)
pRanRel[j]<-ifelse(resRelTmp>=sum(resRel==0 | resRel==2),1,0)
}
We will use 2 definitions of meaningful or substantial change:
The two tabulations below list how many times we see different types of concordant or discordant pairs of successive measurements:
Concordant codes:
Discordant codes:
pRandSd<-sum(pRanSd)/N
pRandRel<-sum(pRanRel)/N
There are 106 substantial changes in PCR log2 oocyst count and 46 substantial changes in ELISA normalised OD.
Of the 109 instances where at least one of PCR log2 oocyst count or ELISA normalised OD changed substantially, 30 instances were in the same direction (27.5%). We would expect 17.2% by random chance alone.
Of all 119 pairs of successive measurements that were considered, 40 were concordant (no change or change in the same direction; 33.6%, p-vaue for this happening by chance: 0.078).
There are 75 substantial changes in PCR log2 oocyst count and 98 substantial changes in ELISA normalised OD.
Of the 108 instances where at least one of PCR log2 oocyst count or ELISA normalised OD changed substantially, 39 instances were in the same direction (36.1%). We would expect 26% by random chance alone.
Of all 119 pairs of successive measurements that were considered, 50 were concordant (no change or change in the same direction; 42%, p-value for this happening by chance: 0.165).
# Q: use multiple imputation??
## prepare data
datTmp<-dat[,c("PCR.PATID","PCR.log2_first","ELISA.OD.norm")]
datTmp<-datTmp[rowSums(is.na(datTmp))==0,]
## fit model
modMixSpline<-lmer(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,knots=c(12,17,22)) + (1|PCR.PATID), data=datTmp, REML = TRUE)
#modMixSpline<-lme(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,df=4),random=~1|PCR.PATID, data=datTmp, method = "REML")
#
# fn<-function(pars){
# modTmp<-try(lmer(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,knots=pars) + (1|PCR.PATID), data=datTmp, REML = TRUE))
# if(is.character(modTmp)){
# return(1e6)
# }else{
# return(-as.numeric(summary(modTmp)$logLik))
# }
# }
#
# knotsOpt<-optim(par=c(12,17,22),fn=fn)
## fit null model (natural cubic spline)
modMixNull<-lmer(ELISA.OD.norm ~ 1 + (1|PCR.PATID), data=datTmp, REML = TRUE)
## do likelihood ratio test
modMixSplineML<-lmer(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,knots=c(12,17,22)) + (1|PCR.PATID), data=datTmp, REML = FALSE)
modMixNullML<-lmer(ELISA.OD.norm ~ 1 + (1|PCR.PATID), data=datTmp, REML = FALSE)
lrtPval<-signif(digits=3,anova(modMixSplineML,modMixNullML)$`Pr(>Chisq)`[2])
## plot
newdat <- data.frame(PCR.log2_first=seq(0,25,length=100),ELISA.OD.norm=NA)
newdat$ELISA.OD.norm <- predict(modMixSpline,newdat,re.form=NA)
mm <- model.matrix(terms(modMixSpline),newdat)
# http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#predictions-andor-confidence-or-prediction-intervals-on-predictions
# https://stat.ethz.ch/pipermail/r-sig-mixed-models/2010q2/003585.html
#
pvar1 <- diag(mm %*% tcrossprod(vcov(modMixSpline),mm))
tvar1 <- pvar1+VarCorr(modMixSpline)$PCR.PATID[1] ## must be adapted for more complex models
cmult <- 1.96
newdat <- data.frame(
newdat,
plo = newdat$ELISA.OD.norm-cmult*sqrt(pvar1),
phi = newdat$ELISA.OD.norm+cmult*sqrt(pvar1),
tlo = newdat$ELISA.OD.norm-cmult*sqrt(tvar1),
thi = newdat$ELISA.OD.norm+cmult*sqrt(tvar1)
)
g<-ggplot(data=newdat,mapping=aes(x=PCR.log2_first,y=ELISA.OD.norm)) +
geom_line(col="black",lwd=2) +
geom_ribbon(aes(ymin=tlo, ymax=thi),col=NA,alpha=0.3,fill="darkgrey") +
geom_point(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),data=dat,cex=3) +
geom_line(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),lwd=0.2,lty=2,data=dat[!is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm),]) +
scale_color_manual(values=c("steelblue","orange"),labels=c("CFZ","Placebo")) +
labs(title=paste(sep="\n","Mixed effects, natural cubic spline (df=4; 3 knots) model.","Confidence band based on fixed-effects uncertainty + random-effects variance.",paste(sep="","Likelihood ratio test p-value for association: ",lrtPval,"."))) +
xlab("log2 oocyst count per gram") +
ylab("ELISA normalised OD") +
theme(text=element_text(size=18))
print(g)
png(width=16,height=9,res=300,units="in",file="cubicSplineFit.png")
print(g)
dev.off()
## quartz_off_screen
## 2
## fit model (linear spline)
modMixSpline<-lmer(ELISA.OD.norm ~ 1 + bs(PCR.log2_first,knots=c(10),degree=1) + (1|PCR.PATID), data=datTmp, REML = TRUE)
# fn<-function(pars){
# modTmp<-try(lmer(ELISA.OD.norm ~ 1 + bs(PCR.log2_first,knots=pars,degree=1) + (1|PCR.PATID), data=datTmp, REML = TRUE))
# if(is.character(modTmp)){
# return(1e6)
# }else{
# return(-as.numeric(summary(modTmp)$logLik))
# }
# }
#
# knotsOpt<-optim(par=c(10),fn=fn,method="Brent",lower=0,upper=22)
## do likelihood ratio test
modMixSplineML<-lmer(ELISA.OD.norm ~ 1 + bs(PCR.log2_first,knots=c(10),degree=1) + (1|PCR.PATID), data=datTmp, REML = FALSE)
lrtPval<-signif(digits=3,anova(modMixSplineML,modMixNullML)$`Pr(>Chisq)`[2])
## plot
newdat <- data.frame(PCR.log2_first=seq(0,25,length=100),ELISA.OD.norm=NA)
newdat$ELISA.OD.norm <- predict(modMixSpline,newdat,re.form=NA)
mm <- model.matrix(terms(modMixSpline),newdat)
# http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#predictions-andor-confidence-or-prediction-intervals-on-predictions
# https://stat.ethz.ch/pipermail/r-sig-mixed-models/2010q2/003585.html
#
pvar1 <- diag(mm %*% tcrossprod(vcov(modMixSpline),mm))
tvar1 <- pvar1+VarCorr(modMixSpline)$PCR.PATID[1] ## must be adapted for more complex models
cmult <- 1.96
newdat <- data.frame(
newdat,
plo = newdat$ELISA.OD.norm-cmult*sqrt(pvar1),
phi = newdat$ELISA.OD.norm+cmult*sqrt(pvar1),
tlo = newdat$ELISA.OD.norm-cmult*sqrt(tvar1),
thi = newdat$ELISA.OD.norm+cmult*sqrt(tvar1)
)
g<-ggplot(data=newdat,mapping=aes(x=PCR.log2_first,y=ELISA.OD.norm)) +
geom_line(col="black",lwd=2) +
geom_ribbon(aes(ymin=tlo, ymax=thi),col=NA,alpha=0.3,fill="darkgrey") +
geom_point(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),data=dat,cex=3) +
geom_line(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),lwd=0.2,lty=2,data=dat[!is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm),]) +
scale_color_manual(values=c("steelblue","orange"),labels=c("CFZ","Placebo")) +
labs(title=paste(sep="\n","Mixed effects, linear spline (df=2; 1 knot) model.","Confidence band based on fixed-effects uncertainty + random-effects variance.",paste(sep="","Likelihood ratio test p-value for association: ",lrtPval,"."))) +
xlab("log2 oocyst count per gram") +
ylab("ELISA normalised OD") +
theme(text=element_text(size=18))
print(g)
png(width=16,height=9,res=300,units="in",file="linearSplineFit.png")
print(g)
dev.off()
## quartz_off_screen
## 2
Note:
This is using the PCR data from the first stool of the day. Please confirm that this is also what the ELISA was run on.
Update: yes, confirmed.
\[\,\]
# repeated measures correlation (PCR log2-oocyst per gram and ELISA normalised OD)
dat$PCR.PATID<-factor(dat$PCR.PATID)
dat2<-dat[dat$PCRCT.Ct<35,]
dat2$PCR.log2_first_rank<-rank(dat2$PCR.log2_first)
dat2$ELISA.OD.norm_rank<-rank(dat2$ELISA.OD.norm)
corPearsonRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first",measure2="ELISA.OD.norm",dataset=dat2)
corSpearmanRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first_rank",measure2="ELISA.OD.norm_rank",dataset=dat2)
Pearson correlation coefficient (repeated measures): 0.3753 (95% CI = [0.2014,0.5262], p = 0.000049).
Spearman correlation coefficient (rank-based / non-parametric, repeated measures): 0.1264 (95% CI = [-0.0374,0.2836], p = 0.127104).
\[\,\]
# repeated measures correlation (PCR log2-oocyst per gram and ELISA normalised OD)
dat$PCR.PATID<-factor(dat$PCR.PATID)
dat2<-dat[dat$PCRCT.Ct<35 & dat$ELISA.OD.norm>0,]
dat2$PCR.log2_first_rank<-rank(dat2$PCR.log2_first)
dat2$ELISA.OD.norm_rank<-rank(dat2$ELISA.OD.norm)
corPearsonRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first",measure2="ELISA.OD.norm",dataset=dat2)
corSpearmanRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first_rank",measure2="ELISA.OD.norm_rank",dataset=dat2)
Pearson correlation coefficient (repeated measures): 0.3788 (95% CI = [0.1446,0.5728], p = 0.001861).
Spearman correlation coefficient (rank-based / non-parametric, repeated measures): 0.3854 (95% CI = [0.1522,0.578], p = 0.001522).
Note that what looks like a single point at log2 oocyst per gram = 9.86, Ct=40 are in fact 4 samples:
CFZ0010025.Visit7, CFZ0010147.Visit4, CFZ0010147.Visit6, CFZ0010179.Visit6.
dat %>%
filter(PCRCT.Ct<35) %>%
ggplot(mapping=aes(x=PCR.log2_first,y=PCRCT.Ct,col=PCRCT.Experiment)) +
geom_point(size=2) +
#geom_smooth(se=F) +
#geom_text(data=dat[dat$PCRCT.Ct>35 | (dat$PCRCT.Ct>30 & dat$PCR.log2_first>15) | (dat$PCRCT.Ct<29 & dat$PCR.log2_first<11.5),],mapping=aes(x=PCR.log2_first,y=PCRCT.Ct,label=PCR.Sample.ID),nudge_x=0,nudge_y=0.35) +
xlab("log2 oocyst per gram") +
ylab("qPCR Ct value") +
ggtitle("qPCR CT vs qPCR log2 oocyst per gram (first stool of the day).") +
scale_color_manual(values=c("steelblue","salmon","orange","cyan","mediumorchid","darkgrey","brown","darkgreen","greenyellow","black"),name="PCR experiment") +
theme(text=element_text(size=16))
These are all the potentially problematic samples:
datTmp<-dat[!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct>35,c("PCR.Sample.ID","PCRCT.Lab.Sample.ID","PCR.log2_first","PCRCT.Ct")]
datTmp<-datTmp[order(datTmp$PCRCT.Ct),]
kable(datTmp,row.names=F) %>%
kable_styling()
| PCR.Sample.ID | PCRCT.Lab.Sample.ID | PCR.log2_first | PCRCT.Ct |
|---|---|---|---|
| CFZ0011548.Visit7 | CDL127F1 | 8.17047 | 35.260 |
| CFZ0011019.Visit3 | CDH131F1 | 7.38882 | 35.414 |
| CFZ0010010.Visit5 | CDJ102F1 | 8.44481 | 35.545 |
| CFZ0010147.Visit5 | CDJ117F1 | 5.61746 | 36.206 |
| CFZ0010147.Visit7 | CDL10MF1 | 5.50515 | 36.320 |
| CFZ0010147.Visit1 | CDA10XF1 | 5.05775 | 36.776 |
| CFZ0011086.Visit8 | CDM14WF1 | 8.31257 | 37.091 |
| CFZ0010293.Visit8 | CDM10XF1 | 3.81738 | 38.003 |
| CFZ0010025.Visit7 | CDL108F1 | 9.85720 | 40.000 |
| CFZ0010147.Visit4 | CDI10NF1 | 9.85720 | 40.000 |
| CFZ0010147.Visit6 | CDE10PF1 | 9.85720 | 40.000 |
| CFZ0010179.Visit6 | CDE11EF1 | 9.85720 | 40.000 |
compSensSpec<-function(dat,timeVar,goldStdVar,diagVar,goldStdThreshold,nEval=1000,logScale=F,logScaleOffSet=0){
times<-unique(dat[,timeVar])
res<-data.frame(threshold=rep(NA,nEval),sensitivity=rep(NA,nEval),specificity=rep(NA,nEval),PPV=rep(NA,nEval),NPV=rep(NA,nEval),YoudenJ=NA)
dat<-dat[!is.na(dat[,goldStdVar]) & !is.na(dat[,diagVar]),]
rangeDiag<-range(dat[,diagVar])
if(logScale){
diagThr<-exp(seq(log(rangeDiag[1]+logScaleOffSet),log(rangeDiag[2]+logScaleOffSet),length=nEval))-logScaleOffSet
}else{
diagThr<-seq(rangeDiag[1],rangeDiag[2],length=nEval)
}
res$threshold<-diagThr
dat$goldStd<-ifelse(dat[,goldStdVar]<=goldStdThreshold,1,0)
sensMat<-matrix(nrow=nEval,ncol=length(times))
specMat<-sensMat
ppvMat<-sensMat
npvMat<-sensMat
sensWeights<-sensMat
specWeights<-sensMat
ppvWeights<-sensMat
npvWeights<-sensMat
totPos<-sum(dat$goldStd==1)
totNeg<-sum(dat$goldStd==0)
for(i in 1:nEval){
for(j in 1:length(times)){
tmpGS<-dat$goldStd[dat[,timeVar]==times[j]]
tmpDiag<-dat[dat[,timeVar]==times[j],diagVar]
tmpDiag<-ifelse(tmpDiag>=diagThr[i],1,0)
sensMat[i,j]<-sum(tmpGS==1 & tmpDiag==1)/sum(tmpGS==1)
specMat[i,j]<-sum(tmpGS==0 & tmpDiag==0)/sum(tmpGS==0)
ppvMat[i,j]<-sum(tmpGS==1 & tmpDiag==1)/sum(tmpDiag==1)
npvMat[i,j]<-sum(tmpGS==0 & tmpDiag==0)/sum(tmpDiag==0)
sensWeights[i,j]<-sum(tmpGS==1)/totPos
specWeights[i,j]<-sum(tmpGS==0)/totNeg
ppvWeights[i,j]<-sum(tmpDiag==1)/sum(dat[,diagVar]>=diagThr[i])
npvWeights[i,j]<-sum(tmpDiag==0)/sum(dat[,diagVar]<diagThr[i])
}
res$sensitivity[i]<-sum((sensWeights[i,]*sensMat[i,])[!is.nan(sensMat[i,])])
res$specificity[i]<-sum((specWeights[i,]*specMat[i,])[!is.nan(specMat[i,])])
res$PPV[i]<-sum(ppvWeights[i,]*ppvMat[i,])
res$NPV[i]<-sum(npvWeights[i,]*npvMat[i,])
}
res$YoudenJ<-res$sensitivity+res$specificity-1
return(res)
}
sensSpec35<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.OD.norm",goldStdThreshold=35,nEval=1000,logScale=T,logScaleOffSet=0.001)
sensSpec32<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.OD.norm",goldStdThreshold=32,nEval=1000,logScale=T,logScaleOffSet=0.001)
#sensSpec35<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.OD.norm",goldStdThreshold=35,nEval=5000) # cannot do this: almost no negative samples according to PCR --> lots of divisions by 0
sens<-sensSpec35$sensitivity
dsens<-c(diff(sensSpec35$sensitivity),0)
dspec<-abs(c(diff(1-sensSpec35$specificity),0))
auc<-sum(sens*dspec) + sum(dsens*dspec)/2
maxJ<-max(sensSpec35$YoudenJ,na.rm=T)
tmpSensSpec<-sensSpec35[sensSpec35$YoudenJ==maxJ,]
bestSens<-unique(tmpSensSpec$sensitivity)
bestSpec<-unique(tmpSensSpec$specificity)
bestOD<-median(tmpSensSpec$threshold)
g<-ggplot(data=as.data.frame(sensSpec35),mapping=aes(x=1-specificity,y=sensitivity)) +
geom_path(lwd=1.75,col="steelblue") +
geom_point(size=1.75) +
geom_abline(intercept=0,slope=1,lty=2,col="darkgrey",lwd=1) +
ggtitle(paste("ROC curve for normalised ELISA OD vs. PCR as gold standard with threshold\nCt = 35 (AUC = ",round(auc,digits=2),").",sep="")) +
theme_light() +
theme(text=element_text(size=18)) +
geom_hline(mapping=aes(yintercept=bestSens),lwd=1.2,lty=3,col="orange") +
geom_vline(mapping=aes(xintercept=1-bestSpec),lwd=1.2,lty=3,col="orange") +
geom_text(mapping=aes(x=0.033,y=0.5,label=paste(sep="","J = ",format(nsmall=2,round(digits=2,maxJ)))),col="orange",size=7)
print(g)
png(file=paste(sep="",outPrefix,"_Fig2_ROC_Ct35.png"),height=12,width=12,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen
## 2
sens<-sensSpec32$sensitivity
dsens<-c(diff(sensSpec32$sensitivity),0)
dspec<-abs(c(diff(1-sensSpec32$specificity),0))
auc<-sum(sens*dspec) + sum(dsens*dspec)/2
maxJ<-max(sensSpec32$YoudenJ,na.rm=T)
tmpSensSpec<-sensSpec32[sensSpec32$YoudenJ==maxJ,]
bestSens<-unique(tmpSensSpec$sensitivity)
bestSpec<-unique(tmpSensSpec$specificity)
bestOD<-median(tmpSensSpec$threshold)
g<-ggplot(data=as.data.frame(sensSpec32),mapping=aes(x=1-specificity,y=sensitivity)) +
geom_path(lwd=1.75,col="steelblue") +
geom_point(size=1.75) +
geom_abline(intercept=0,slope=1,lty=2,col="darkgrey",lwd=1) +
ggtitle(paste("ROC curve for normalised ELISA OD vs. PCR as gold standard with threshold\nCt = 32 (AUC = ",round(auc,digits=2),").",sep="")) +
theme_light() +
theme(text=element_text(size=18)) +
geom_hline(mapping=aes(yintercept=bestSens),lwd=1.2,lty=3,col="orange") +
geom_vline(mapping=aes(xintercept=1-bestSpec),lwd=1.2,lty=3,col="orange") +
geom_text(mapping=aes(x=0.03,y=0.5,label=paste(sep="","J = ",format(nsmall=2,round(digits=2,maxJ)))),col="orange",size=7)
print(g)
png(file=paste(sep="",outPrefix,"_Fig2_ROC_Ct32.png"),height=12,width=12,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen
## 2
Note that the AUC above is for the actual area under the plotted curve. The AUC can be interpreted as the probability of a randomly chosen positive sample having a larger value in the evaluated diagnostic than a randomly chosen negative sample. Calculating the AUC this way directly, gives the same result.
(Though note that this latter calculation currently ignores the correlated / longitudinal nature of the data).
datNoNA<-dat[!is.na(dat$PCRCT.Ct) & !is.na(dat$ELISA.OD.norm),]
pos<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct<=35],size=1e7,replace=T)
neg<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct>35],size=1e7,replace=T)
probaAUC<-(sum(pos>neg) + sum(pos==neg)/2)/1e7
cat(paste(sep="","The probabilistic AUC (Ct threshold 35) is ",round(digits=2,probaAUC),".\n"))
## The probabilistic AUC (Ct threshold 35) is 0.65.
datNoNA<-dat[!is.na(dat$PCRCT.Ct) & !is.na(dat$ELISA.OD.norm),]
pos<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct<=32],size=1e7,replace=T)
neg<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct>32],size=1e7,replace=T)
probaAUC<-(sum(pos>neg) + sum(pos==neg)/2)/1e7
cat(paste(sep="","The probabilistic AUC (Ct threshold 32) is ",round(digits=2,probaAUC),".\n"))
## The probabilistic AUC (Ct threshold 32) is 0.69.
We can also calculate Youden’s J index. This can be interpreted as the probability of a positive result (i.e. result based on ELISA normalised OD being above the decision threshold) given that the sample is qPCR positive. Of specific interest are the maximum value of Youden’s J and what is the ELISA OD decision threshold associated with that.
In our case, the maximum Youden’s J is achieved for sensitivity 0.47 and specificity 0.92, corresponding to a median ELISA normalised OD threshold of 0.0035.
The maximum Youden index is indicated in orange on the ROC curve above.
dat$ELISA.bin<-ifelse(dat$ELISA.OD>0.09,1,0)
ELISAbinSensSpec35<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=35,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]
ELISAbinSensSpec32<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=32,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]
sensSpecPpvNpv<-data.frame(
qPCR_Thr=c(32,35),
sensitivity=c(ELISAbinSensSpec32$sensitivity,ELISAbinSensSpec35$sensitivity),
specificity=c(ELISAbinSensSpec32$specificity,ELISAbinSensSpec35$specificity),
PPV=c(ELISAbinSensSpec32$PPV,ELISAbinSensSpec35$PPV),
NPV=c(ELISAbinSensSpec32$NPV,ELISAbinSensSpec35$NPV)
)
B<-1e5
sensSpecPpvNpvBS<-list()
for(b in 1:B){
datTmp<-dat[sample(x=1:nrow(dat),size=nrow(dat),replace=T),]
ELISAbinSensSpec35Tmp<-compSensSpec(dat=datTmp[!is.element(el=datTmp$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=35,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]
ELISAbinSensSpec32Tmp<-compSensSpec(dat=datTmp[!is.element(el=datTmp$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=32,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]
sensSpecPpvNpvBS[[b]]<-data.frame(
qPCR_Thr=c(32,35),
sensitivity=c(ELISAbinSensSpec32Tmp$sensitivity,ELISAbinSensSpec35Tmp$sensitivity),
specificity=c(ELISAbinSensSpec32Tmp$specificity,ELISAbinSensSpec35Tmp$specificity),
PPV=c(ELISAbinSensSpec32Tmp$PPV,ELISAbinSensSpec35Tmp$PPV),
NPV=c(ELISAbinSensSpec32Tmp$NPV,ELISAbinSensSpec35Tmp$NPV)
)
}
sensSpecPpvNpvCI<-sensSpecPpvNpv
for(i in 1:nrow(sensSpecPpvNpvCI)){
for(j in 2:ncol(sensSpecPpvNpvCI)){
tt<-rep(NA,B)
for(b in 1:B){tt[b]<-sensSpecPpvNpvBS[[b]][i,j]}
ci<-quantile(tt,probs=c(0.025,0.975),na.rm=T)
sensSpecPpvNpvCI[i,j]<-paste(sep="","(",format(nsmall=3,round(digits=3,ci[1])),",",format(nsmall=3,round(digits=3,ci[2])),")")
}
}
kable(sensSpecPpvNpv,caption="Sensitivity, specificity, positive predictive value (PPV) and negative predictive value (PPV) for ELISA assuming qPCR as the gold standard.", col.names=c("qPCR Threshold","Sensitivity","Specificity","PPV","NPV"),digits=3) %>%
kable_styling(full_width=F)
| qPCR Threshold | Sensitivity | Specificity | PPV | NPV |
|---|---|---|---|---|
| 32 | 0.353 | 0.92 | 0.953 | 0.235 |
| 35 | 0.326 | 1.00 | 1.000 | 0.092 |
kable(sensSpecPpvNpvCI,caption="Bootstrapped 95% confidence intervals for the sensitivity, specificity, positive predictive value (PPV) and negative predictive value (PPV) for ELISA assuming qPCR as the gold standard.", col.names=c("qPCR Threshold","Sensitivity","Specificity","PPV","NPV"),digits=3) %>%
kable_styling(full_width=F)
| qPCR Threshold | Sensitivity | Specificity | PPV | NPV |
|---|---|---|---|---|
| 32 | (0.267,0.442) | (0.800,1.000) | (0.881,1.000) | (0.153,0.322) |
| 35 | (0.246,0.407) | (1.000,1.000) | (1.000,1.000) | (0.040,0.152) |
First, the tables from Thomas Conrad’s presentation at the lake conference.
Note:
The table for RDT vs qPCR uses a qPCR threshold of 35 and it was not possible to generate the table for a threshold of 32. qPCR Ct values are only avilable for the enrolled subjects and hence I could not re-compute the qPCR positive/negative result for a new threshold from the data file that was shared with me.
#table(preScreen$RDT.Result,preScreen$qPCR.Result,useNA="ifany")
tt<-table(preScreen$RDT.Result,preScreen$qPCR.Result)
tt<-data.frame(var1="RDT",RDT=rownames(tt),PCRneg=tt[,"negative"],PCRpos=tt[,"positive"])
kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
kable_styling(full_width = F) %>%
add_header_above(c(" " = 2, "qPCR" = 2)) %>%
column_spec(2, bold = T) %>%
collapse_rows(columns = 1, valign = "middle")
| negative | positive | ||
|---|---|---|---|
| RDT | negative | 483 | 54 |
| positive | 0 | 20 |
#table(preScreen$RDT.Result,preScreen$ELISA.Result,useNA="ifany")
tt<-table(preScreen$RDT.Result,preScreen$ELISA.Result)
tt<-data.frame(var1="RDT",RDT=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"])
kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
kable_styling(full_width = F) %>%
add_header_above(c(" " = 2, "ELISA" = 2)) %>%
column_spec(2, bold = T) %>%
collapse_rows(columns = 1, valign = "middle")
| negative | positive | ||
|---|---|---|---|
| RDT | negative | 14 | 2 |
| positive | 1 | 5 |
Note that this is virtually the same as in Thomas’ presentation, but: * 5 more observations that are both RDT and qPCR negative than in Tom’s table (only 468 of these)
tmpDat<-data.frame(
rdt=ifelse(preScreen$RDT.Result=="positive",2,1),
qpcr=ifelse(preScreen$qPCR.Result=="positive",2,1),
elisa=ifelse(preScreen$ELISA.Result=="positive",2,1)
)
f=cbind(rdt,qpcr,elisa) ~ 1
poLCA(f, tmpDat, nclass = 2, verbose=F, na.rm=F, maxiter=1e5, nrep=5)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $rdt
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.3052 0.6948
##
## $qpcr
## Pr(1) Pr(2)
## class 1: 0.9145 0.0855
## class 2: 0.0000 1.0000
##
## $elisa
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.1667 0.8333
##
## Estimated class population shares
## 0.9493 0.0507
##
## Predicted class memberships (by modal posterior prob.)
## 0.9624 0.0376
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 585
## number of fully observed cases: 22
## number of estimated parameters: 7
## residual degrees of freedom: 0
## maximum log-likelihood: -270.6043
##
## AIC(2): 555.2086
## BIC(2): 585.8099
## G^2(2): 77.27358 (Likelihood ratio/deviance statistic)
## X^2(2): 130.3341 (Chi-square goodness of fit)
##
cat(file=logFile,append=T,"This is the end.\n")
## This is the end.